# @Author: 唐奇才
# @Time: 2021/5/25 7:05
# @File: SpiderProsyFromWeb.py
# @Software: PyCharm

import requests
from SpiderUtils.myUtils import myFakerHeaders
from bs4 import BeautifulSoup
import json

baseULR = "http://www.66ip.cn/{}.html"


def get_html(url):
    header = {'User-Agent': myFakerHeaders.getFakeHeaders()}
    req = requests.get(url=url, headers=header)
    # 因为又是后是图片，就不要返回text了
    return req


def getProxy():
    for i in range(1, 100):
        html = get_html(baseULR.format(i))
        html.encoding = "gb2312"
        bs = BeautifulSoup(html.text, "html.parser")
        pt = bs.select("table")[-1]
        ptr = pt.find_all("tr")
        for i in range(1, len(ptr)):
            # print(ptr[i].find_all("td"))
            proxy = str({"http": "http://" + ptr[i].find_all("td")[0].text + ":" + ptr[i].find_all("td")[1].text}) + ",\n"
            # p = '{"http": "http://{}:{}"}'.format(ptr[i].find_all("td")[0].text, ptr[i].find_all("td")[1].text)
            # print(proxy)
            # print(p)
            with open("./proxy.json", "a") as f:
                f.write(proxy)
                print(i)



def main():
    getProxy()


if __name__ == '__main__':
    main()
